here::i_am("R/TemporalStability.R")
library(here)
library(readr)
library(ggplot2)
library(tidyr)
library(dplyr)
library(scales)
library(purrr)

# Data preparation---------------------
df <- read_csv(here("data/tidy/temporal_robustness.csv"), show_col_types = FALSE)

# Filter out the "Total" category for individual category analysis
df_categories <- df %>%
  filter(`Primary Input Category` != "Total")

# Keep only the Total for aggregate analysis
df_total <- df %>%
  filter(`Primary Input Category` == "Total")

# Create long format for easier plotting
df_long <- df_categories %>%
  select(year, `Primary Input Category`, 
         `Final Demand Dependency Share`, 
         `Total Trade Dependency Share`,
         `Intermediate Share`,
         `Multiplier Effect`) %>%
  pivot_longer(cols = c(`Final Demand Dependency Share`, 
                        `Total Trade Dependency Share`,
                        `Intermediate Share`,
                        `Multiplier Effect`),
               names_to = "metric", 
               values_to = "value") %>%
  mutate(
    metric = case_when(
      metric == "Final Demand Dependency Share" ~ "Final Demand\nDependency (%)",
      metric == "Total Trade Dependency Share" ~ "Total Trade\nDependency (%)", 
      metric == "Intermediate Share" ~ "Intermediate\nShare (%)",
      metric == "Multiplier Effect" ~ "Multiplier\nEffect"
    ),
    category = case_when(
      `Primary Input Category` == "Employee Compensation" ~ "Employee\nCompensation",
      `Primary Input Category` == "Net Mixed Income" ~ "Net Mixed\nIncome",
      `Primary Input Category` == "Net Operating Surplus" ~ "Net Operating\nSurplus"
    )
  )
# Primary visualization----------------
colors <- c("#2E86AB", "#A23B72", "#F18F01")

# Create the main multi-panel plot
p_main <- ggplot(df_long, aes(x = year, y = value, color = category)) +
  geom_line(size = 1.2, alpha = 0.8) +
  geom_point(size = 2.5, alpha = 0.9) +
  facet_wrap(~metric, scales = "free_y", ncol = 2) +
  scale_color_manual(
    values = colors, name = "Primary Input\nCategory"
    ) +
  scale_x_continuous(
    breaks = function(x) seq(floor(min(x)), ceiling(max(x)), 1)
    ) +
  scale_y_continuous(labels = label_percent(scale = 1)) + 
  labs(
    title = "Temporal Robustness of Global South Trade Dependencies",
    subtitle = "Dependency measures across primary input categories over time",
    x = "Year",
    y = "Value",
    caption = "Source: EORA26 multi-regional input-output database"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5, margin = margin(b = 20)),
    strip.text = element_text(size = 11, face = "bold"),
    legend.position = "bottom",
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 9),
    axis.text.x = element_text(angle = 0, hjust = 0.5),
    panel.grid.minor = element_blank(),
    panel.spacing = unit(1, "lines")
  )
p_main

ggsave(here("output/Appendix_temporal_robustness.png"), p_main, 
       width = 12, height = 8, dpi = 300, bg = "white")

# Supplementary tables-----------------
## Summary table-----------------------
table_summary <- df_categories %>%
  group_by(`Primary Input Category`) %>%
  summarise(
    `Mean (%)` = round(mean(`Total Trade Dependency Share`), 1),
    `Std Dev` = round(sd(`Total Trade Dependency Share`), 1),
    `Min (%)` = round(min(`Total Trade Dependency Share`), 1),
    `Max (%)` = round(max(`Total Trade Dependency Share`), 1),
    `CV` = round(sd(`Total Trade Dependency Share`) / mean(`Total Trade Dependency Share`), 3),
    `Years Available` = n(),
    .groups = 'drop'
  ) %>%
  bind_rows(
    df_total %>%
      summarise(
        `Primary Input Category` = "Total",
        `Mean (%)` = round(mean(`Total Trade Dependency Share`), 1),
        `Std Dev` = round(sd(`Total Trade Dependency Share`), 1),
        `Min (%)` = round(min(`Total Trade Dependency Share`), 1),
        `Max (%)` = round(max(`Total Trade Dependency Share`), 1),
        `CV` = round(sd(`Total Trade Dependency Share`) / mean(`Total Trade Dependency Share`), 3),
        `Years Available` = n()
      )
  )

### Save nb to be included into qmd file---------
write_csv(table_summary, here("data/tidy/temporal_summary_table.csv"))


## Trend analysis------------
table_summary <- df_categories %>%
  group_by(`Primary Input Category`) %>%
  summarise(
    `Mean (%)` = round(mean(`Total Trade Dependency Share`), 1),
    `Std Dev` = round(sd(`Total Trade Dependency Share`), 1),
    `Min (%)` = round(min(`Total Trade Dependency Share`), 1),
    `Max (%)` = round(max(`Total Trade Dependency Share`), 1),
    `CV` = round(sd(`Total Trade Dependency Share`) / mean(`Total Trade Dependency Share`), 3),
    `Years Available` = n(),
    .groups = 'drop'
  ) %>%
  bind_rows(
    df_total %>%
      summarise(
        `Primary Input Category` = "Total",
        `Mean (%)` = round(mean(`Total Trade Dependency Share`), 1),
        `Std Dev` = round(sd(`Total Trade Dependency Share`), 1),
        `Min (%)` = round(min(`Total Trade Dependency Share`), 1),
        `Max (%)` = round(max(`Total Trade Dependency Share`), 1),
        `CV` = round(sd(`Total Trade Dependency Share`) / mean(`Total Trade Dependency Share`), 3),
        `Years Available` = n()
      )
  )

### Extracting number to be included to the qmd file

# Function to extract model statistics safely
extract_model_stats <- function(data) {
  if (nrow(data) < 3) {
    # Not enough data points for trend analysis
    return(data.frame(
      trend_coef = NA,
      p_value = NA,
      r_squared = NA
    ))
  }
  
  model <- lm(`Total Trade Dependency Share` ~ year, data = data)
  model_summary <- summary(model)
  
  # Extract coefficients safely
  coeffs <- model_summary$coefficients
  
  if (nrow(coeffs) < 2) {
    return(data.frame(
      trend_coef = NA,
      p_value = NA,
      r_squared = NA
    ))
  }
  
  data.frame(
    trend_coef = coeffs[2, 1],  # Coefficient for year
    p_value = coeffs[2, 4],     # P-value for year
    r_squared = model_summary$r.squared
  )
}

# Apply to categories
trend_tests_categories <- df_categories %>%
  group_by(`Primary Input Category`) %>%
  nest() %>%
  mutate(
    model_stats = map(data, extract_model_stats)
  ) %>%
  unnest(model_stats) %>%
  select(-data)

# Apply to total
trend_tests_total <- df_total %>%
  mutate(`Primary Input Category` = "Total") %>%
  nest(data = -`Primary Input Category`) %>%
  mutate(
    model_stats = map(data, extract_model_stats)
  ) %>%
  unnest(model_stats) %>%
  select(-data)

# Combine results
trend_tests <- bind_rows(trend_tests_categories, trend_tests_total) %>%
  mutate(
    stability = case_when(
      is.na(p_value) ~ "Insufficient Data",
      p_value > 0.05 ~ "Stable",
      TRUE ~ "Trending"
    )
  )

# Format the stability table for output
table_stability <- trend_tests %>%
  mutate(
    `Trend Coefficient` = round(trend_coef, 3),
    `p-value` = round(p_value, 3),
    `R²` = round(r_squared, 3),
    `Stability Assessment` = stability
  ) %>%
  select(`Primary Input Category`, `Trend Coefficient`, `p-value`, `R²`, `Stability Assessment`)

# Save tables as CSV files for reading into Quarto
write_csv(table_stability, here("data/tidy/temporal_stability_table.csv"))

